{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'paddlehub'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-8-9ddacf036f2f>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpyplot\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mcollections\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mCounter\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 5\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0mpaddlehub\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mhub\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      6\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mpaddle\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      7\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmodel_selection\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mtrain_test_split\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'paddlehub'"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from collections import Counter\n",
    "import paddlehub as hub\n",
    "import paddle\n",
    "from sklearn.model_selection import train_test_split\n",
    "from paddlehub.datasets.base_nlp_dataset import TextClassificationDataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple\n",
      "Collecting paddlehub\n",
      "  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/4b/40/27c86a86d7697bd503caf84890f50503f86bab4330e848629d6f37625d3f/paddlehub-2.4.0-py3-none-any.whl (213 kB)\n",
      "Requirement already satisfied: colorama in c:\\users\\administrator\\anaconda3\\lib\\site-packages (from paddlehub) (0.3.9)\n",
      "Collecting colorlog (from paddlehub)\n",
      "  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/e3/51/9b208e85196941db2f0654ad0357ca6388ab3ed67efdbfc799f35d1f83aa/colorlog-6.9.0-py3-none-any.whl (11 kB)\n",
      "Collecting easydict (from paddlehub)\n",
      "  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/05/ec/fa6963f1198172c2b75c9ab6ecefb3045991f92f75f5eb41b6621b198123/easydict-1.13-py3-none-any.whl (6.8 kB)\n",
      "Requirement already satisfied: filelock in c:\\users\\administrator\\anaconda3\\lib\\site-packages (from paddlehub) (3.0.8)\n",
      "Collecting flask>=1.1.0 (from paddlehub)\n",
      "  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/9f/1a/8b6d48162861009d1e017a9740431c78d860809773b66cac220a11aa3310/Flask-2.2.5-py3-none-any.whl (101 kB)\n",
      "Requirement already satisfied: numpy in c:\\users\\administrator\\anaconda3\\lib\\site-packages (from paddlehub) (1.21.6)\n",
      "Requirement already satisfied: matplotlib in c:\\users\\administrator\\anaconda3\\lib\\site-packages (from paddlehub) (2.2.3)\n",
      "Collecting opencv-python (from paddlehub)\n",
      "  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/a4/7d/f1c30a92854540bf789e9cd5dde7ef49bbe63f855b85a2e6b3db8135c591/opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl (39.5 MB)\n",
      "Requirement already satisfied: packaging in c:\\users\\administrator\\anaconda3\\lib\\site-packages (from paddlehub) (17.1)\n",
      "Collecting paddle2onnx>=0.5.1 (from paddlehub)\n",
      "  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/bb/e6/47b7b83f75d631146a9728d85d7705684d66bab1018d8a5d8eecaa37599d/paddle2onnx-1.0.6-cp37-cp37m-win_amd64.whl (1.7 MB)\n",
      "Collecting paddlenlp>=2.0.0 (from paddlehub)\n",
      "  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/44/62/98dd0ca2f6600ca1dfc9c59ba1b40628df5f7948abc85ba16c3367c49cf4/paddlenlp-2.8.1-py3-none-any.whl (2.9 MB)\n",
      "Requirement already satisfied: Pillow in c:\\users\\administrator\\anaconda3\\lib\\site-packages (from paddlehub) (5.2.0)\n",
      "Requirement already satisfied: pyyaml in c:\\users\\administrator\\anaconda3\\lib\\site-packages (from paddlehub) (3.13)\n",
      "Requirement already satisfied: pyzmq in c:\\users\\administrator\\anaconda3\\lib\\site-packages (from paddlehub) (17.1.2)\n",
      "Collecting rarfile (from paddlehub)\n",
      "  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/62/fc/ab37559419ca36dd8dd317c3a98395ed4dcee2beeb28bf6059b972906727/rarfile-4.2-py3-none-any.whl (29 kB)\n",
      "Requirement already satisfied: tqdm in c:\\users\\administrator\\anaconda3\\lib\\site-packages (from paddlehub) (4.26.0)\n",
      "Collecting visualdl>=2.0.0 (from paddlehub)\n",
      "  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/ea/b5/37726c750a4f4598660998327c3566b2d2ed5a1a5f44e9f0dde875602447/visualdl-2.5.3-py3-none-any.whl (6.3 MB)\n",
      "Collecting gradio (from paddlehub)\n",
      "  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/29/e9/592634ca97719735608b0cf92b05e5061cfaaa0e77d0c1ed24017273b525/gradio-3.34.0-py3-none-any.whl (20.0 MB)\n",
      "Collecting Werkzeug>=2.2.2 (from flask>=1.1.0->paddlehub)\n",
      "  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/f6/f8/9da63c1617ae2a1dec2fbf6412f3a0cfe9d4ce029eccbda6e1e4258ca45f/Werkzeug-2.2.3-py3-none-any.whl (233 kB)\n",
      "Collecting Jinja2>=3.0 (from flask>=1.1.0->paddlehub)\n",
      "  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl (134 kB)\n",
      "Collecting itsdangerous>=2.0 (from flask>=1.1.0->paddlehub)\n",
      "  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/68/5f/447e04e828f47465eeab35b5d408b7ebaaaee207f48b7136c5a7267a30ae/itsdangerous-2.1.2-py3-none-any.whl (15 kB)\n",
      "Collecting click>=8.0 (from flask>=1.1.0->paddlehub)\n",
      "  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl (98 kB)\n",
      "Collecting importlib-metadata>=3.6.0 (from flask>=1.1.0->paddlehub)\n",
      "  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/ff/94/64287b38c7de4c90683630338cf28f129decbba0a44f0c6db35a873c73c4/importlib_metadata-6.7.0-py3-none-any.whl (22 kB)\n",
      "Requirement already satisfied: six in c:\\users\\administrator\\anaconda3\\lib\\site-packages (from paddle2onnx>=0.5.1->paddlehub) (1.11.0)\n",
      "Collecting jieba (from paddlenlp>=2.0.0->paddlehub)\n",
      "  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/c6/cb/18eeb235f833b726522d7ebed54f2278ce28ba9438e3135ab0278d9792a2/jieba-0.42.1.tar.gz (19.2 MB)\n",
      "  Preparing metadata (setup.py): started\n",
      "  Preparing metadata (setup.py): finished with status 'done'\n",
      "Collecting seqeval (from paddlenlp>=2.0.0->paddlehub)\n",
      "  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/9d/2d/233c79d5b4e5ab1dbf111242299153f3caddddbb691219f363ad55ce783d/seqeval-1.2.2.tar.gz (43 kB)\n",
      "  Preparing metadata (setup.py): started\n",
      "  Preparing metadata (setup.py): finished with status 'error'\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  error: subprocess-exited-with-error\n",
      "  \n",
      "  python setup.py egg_info did not run successfully.\n",
      "  exit code: 1\n",
      "  \n",
      "  [27 lines of output]\n",
      "  Download error on https://pypi.org/simple/setuptools_scm/: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1045) -- Some packages may not be found!\n",
      "  Download error on https://pypi.org/simple/setuptools-scm/: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1045) -- Some packages may not be found!\n",
      "  Couldn't find index page for 'setuptools_scm' (maybe misspelled?)\n",
      "  Download error on https://pypi.org/simple/: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1045) -- Some packages may not be found!\n",
      "  No local packages or working download links found for setuptools_scm\n",
      "  Traceback (most recent call last):\n",
      "    File \"<string>\", line 36, in <module>\n",
      "    File \"<pip-setuptools-caller>\", line 34, in <module>\n",
      "    File \"C:\\Users\\ADMINI~1\\AppData\\Local\\Temp\\pip-install-sthp1x1m\\seqeval_06495e7b30c345258a3b8d328a4d4992\\setup.py\", line 56, in <module>\n",
      "      'Programming Language :: Python :: Implementation :: PyPy'\n",
      "    File \"c:\\users\\administrator\\anaconda3\\lib\\site-packages\\setuptools\\__init__.py\", line 139, in setup\n",
      "      _install_setup_requires(attrs)\n",
      "    File \"c:\\users\\administrator\\anaconda3\\lib\\site-packages\\setuptools\\__init__.py\", line 134, in _install_setup_requires\n",
      "      dist.fetch_build_eggs(dist.setup_requires)\n",
      "    File \"c:\\users\\administrator\\anaconda3\\lib\\site-packages\\setuptools\\dist.py\", line 514, in fetch_build_eggs\n",
      "      replace_conflicting=True,\n",
      "    File \"c:\\users\\administrator\\anaconda3\\lib\\site-packages\\pkg_resources\\__init__.py\", line 777, in resolve\n",
      "      replace_conflicting=replace_conflicting\n",
      "    File \"c:\\users\\administrator\\anaconda3\\lib\\site-packages\\pkg_resources\\__init__.py\", line 1060, in best_match\n",
      "      return self.obtain(req, installer)\n",
      "    File \"c:\\users\\administrator\\anaconda3\\lib\\site-packages\\pkg_resources\\__init__.py\", line 1072, in obtain\n",
      "      return installer(requirement)\n",
      "    File \"c:\\users\\administrator\\anaconda3\\lib\\site-packages\\setuptools\\dist.py\", line 581, in fetch_build_egg\n",
      "      return cmd.easy_install(req)\n",
      "    File \"c:\\users\\administrator\\anaconda3\\lib\\site-packages\\setuptools\\command\\easy_install.py\", line 670, in easy_install\n",
      "      raise DistutilsError(msg)\n",
      "  distutils.errors.DistutilsError: Could not find suitable distribution for Requirement.parse('setuptools_scm')\n",
      "  [end of output]\n",
      "  \n",
      "  note: This error originates from a subprocess, and is likely not a problem with pip.\n",
      "error: metadata-generation-failed\n",
      "\n",
      "Encountered error while generating package metadata.\n",
      "\n",
      "See above for output.\n",
      "\n",
      "note: This is an issue with the package mentioned above, not pip.\n",
      "hint: See above for details.\n"
     ]
    },
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'paddlehub'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-9-4e8bd95c63fd>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msystem\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'pip install -i https://pypi.tuna.tsinghua.edu.cn/simple paddlehub'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mpaddlehub\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdatasets\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbase_nlp_dataset\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mTextClassificationDataset\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m \u001b[1;32mclass\u001b[0m \u001b[0mMyDataset\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mTextClassificationDataset\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m     \u001b[0mbase_path\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'dataset'\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'paddlehub'"
     ]
    }
   ],
   "source": [
    "!pip install -i https://pypi.tuna.tsinghua.edu.cn/simple paddlehub\n",
    "from paddlehub.datasets.base_nlp_dataset import TextClassificationDataset\n",
    "\n",
    "class MyDataset(TextClassificationDataset):\n",
    "    base_path='dataset'\n",
    "    label_list=['体育','科技','社会','娱乐','股票','房产','教育','时政','财经','游戏','家居','彩票','时尚']\n",
    "    def __init__(self,tokenizer,max_seq_len:int=128,mode:str='train'):\n",
    "        if mode == 'train':\n",
    "            data_file = 'train.txt'\n",
    "        elif mode == 'test':\n",
    "            data_file = 'test.txt'\n",
    "        else:\n",
    "            data_file = 'dev.txt'\n",
    "        super().__init__(\n",
    "        base_path=self.base_path,\n",
    "        tokenizer=tokenizer,\n",
    "        max_seq_len=max_seq_len,\n",
    "        mode=mode,\n",
    "        data_file=data_file,\n",
    "        label_list=self.label_list,\n",
    "        is_file_with_header=True)\n",
    "        \n",
    "import paddlehub as hub\n",
    "model = hub.Module(name='ernie_tiny',task='seq-cls',num_classes=len(MyDataset.label_list))\n",
    "tokenizer=model.get_tokenizer()\n",
    "train_dataset=MyDataset(tokenizer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
